/*
 * Copyright (C) 2022 Samsung Electronics Co. LTD
 *
 * This software is proprietary of Samsung Electronics.
 * No part of this software, either material or conceptual may be copied or
 * distributed, transmitted, transcribed, stored in a retrieval system or
 * translated into any human or computer language in any form by any means,
 * electronic, mechanical, manual or otherwise, or disclosed to third parties
 * without the express written permission of Samsung Electronics.
 *
 */

#include "tools/datagen/sls/general/tables_info.h"
#include "tools/datagen/sls/tables_generator/factory.h"

#include "CLI/App.hpp"
#include "CLI/CLI.hpp" // NOLINT(misc-include-cleaner)
#include "CLI/Error.hpp"
#include "CLI/Validators.hpp"

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <filesystem>
#include <optional>
#include <random>
#include <string>
#include <utility>
#include <vector>

int main(int argc, char **argv) {
  CLI::App app{
      "Utility to create synthetic tables data for testing SLS operation"};

  std::filesystem::path tables_root;
  size_t num_tables{};
  std::optional<size_t> min_table_size{};
  size_t max_table_size{};
  size_t sparse_feature_size{};
  std::string generator_name;
  std::string generator_args;

  app.add_option("path", tables_root,
                 "Path to the root directory where test tables will be located")
      ->check(CLI::ExistingDirectory)
      ->required();
  app.add_option("-n,--num_tables", num_tables, "Number of embedding tables")
      ->default_val(1);
  app.add_option("--ms,--min_table_size", min_table_size,
                 "Min number of rows for each table")
      ->default_str("max_table_size");
  app.add_option("--Ms,--max_num_lookup", max_table_size,
                 "Max number of rows for each table")
      ->default_val(100);
  app.add_option("-c,--cols", sparse_feature_size,
                 "Number of columns in each table (sparse_feature_size)")
      ->default_val(16);
  app.add_option("-g,--generator", generator_name,
                 "Name of generator that creates indices.\n"
                 "    random = random values\n"
                 "    position = values generated by equation (tid << 16) + "
                 "(rid << 8) + cid\n"
                 "    float_tapp = float values from sample_app (arguments: "
                 "knum_idx_values, sparse_feature_size)\n"
                 "    uint32_t_tapp = uint32_t values from sample_app "
                 "(arguments: knum_idx_values, sparse_feature_size)\n"
                 "    fixed = fixed values (arguments: value)")
      ->check(CLI::IsMember(
          {"random", "position", "float_tapp", "uint32_t_tapp", "fixed"}))
      ->default_val("random");
  app.add_option("--ga,--gargs", generator_args, "Arguments for generator");

  try {
    app.parse(argc, argv);
  } catch (const CLI::ParseError &e) {
    return app.exit(e);
  }

  auto table_generator =
      tools::gen::sls::TablesGeneratorFactory::default_factory().create(
          generator_name, generator_args);

  std::vector<uint32_t> rows(num_tables);

  std::mt19937_64 engine{std::random_device{}()};
  // If min_table_size is not set, then it evaluates as a
  // fraction of max_table_size with size
  // ROWS_COUNT_MIN_FRACTION * max_table_size
  constexpr static auto ROWS_COUNT_MIN_FRACTION = 0.75;
  const auto min_rows =
      (min_table_size)
          ? *min_table_size
          : static_cast<uint32_t>(static_cast<double>(max_table_size) *
                                  ROWS_COUNT_MIN_FRACTION);

  std::uniform_int_distribution<uint32_t> rows_distr(min_rows, max_table_size);
  std::generate(rows.begin(), rows.end(),
                [&engine, &rows_distr]() { return rows_distr(engine); });

  const tools::gen::sls::TablesInfo info(num_tables, sparse_feature_size,
                                         std::move(rows));
  table_generator->create_and_store(tables_root, info);
  return 0;
}
